{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:30.849752700Z",
     "start_time": "2024-03-17T03:43:30.844778800Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import re\n",
    "# imports\n",
    "import javalang\n",
    "import os\n",
    "from app.database import Database\n",
    "from app.classes import IterationStatus, BatchAttemptStatus, TestStatus, AttemptStatus,Dataset\n",
    "from typing import List\n",
    "from app.constants import CONFIG\n",
    "import pickle\n",
    "import xml.etree.ElementTree as ET\n",
    "from app.utils import etree_to_dict\n",
    "from tqdm import tqdm\n",
    "from app.constants import CoverageMode\n",
    "\n",
    "BASE_DIR = r\"C:\\Users\\tianfy\\Desktop\\毕设\\数据集\\chatunitest\"\n",
    "DATASET = \"gson\"\n",
    "TEST_FOLDER = fr\"{BASE_DIR}\\chatunitest-tests-{DATASET}\"\n",
    "FILTERED_TEST_FOLDER = fr\"{BASE_DIR}\\chatunitest-tests-{DATASET}-filtered\"\n",
    "INFO_ZIP = fr\"{BASE_DIR}\\chatunitest-info_{DATASET}.zip\"\n",
    "CLOVER_XML_PATH = fr\"{BASE_DIR}\\{DATASET}.xml\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "217\n",
      "185\n"
     ]
    }
   ],
   "source": [
    "# template : ClassName_MethodName_MethodId_AttemptId_Test.java\n",
    "# example:  NumberUtils_min_23_0_Test.java\n",
    "# 递归遍历TEST_FOLDER，找到所有符合template的java文件\n",
    "def find_java_files(folder):\n",
    "    java_files = []\n",
    "    for root, dirs, files in os.walk(folder):\n",
    "        for file in files:\n",
    "            if file.endswith(\".java\"):\n",
    "                java_files.append(os.path.join(root, file))\n",
    "    return java_files\n",
    "\n",
    "\n",
    "java_files = find_java_files(TEST_FOLDER)\n",
    "print(len(java_files))\n",
    "pattern = \".*?_.*?_\\d+_\\d+_Test\\.java\"\n",
    "java_files = [file for file in java_files if re.match(pattern, file)]\n",
    "print(len(java_files))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:31.883986Z",
     "start_time": "2024-03-17T03:43:31.874125900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "outputs": [],
   "source": [
    "import zipfile\n",
    "import json\n",
    "import re\n",
    "\n",
    "\n",
    "def get_attempt_flag(records: list):\n",
    "    hasError =  records[-1][\"hasError\"]\n",
    "    if hasError:\n",
    "        return records[-1][\"errorMsg\"][\"errorType\"]\n",
    "    else:\n",
    "        return \"PASS\"\n",
    "\n",
    "class_map = {}\n",
    "\n",
    "with zipfile.ZipFile(INFO_ZIP, 'r') as z:\n",
    "    for item in z.namelist():\n",
    "        if re.match(r'.*attempt\\d+/$', item):\n",
    "            pattern = r'/class(\\d+)/method(\\d+)/attempt(\\d+)/'\n",
    "            match = re.search(pattern, item)\n",
    "\n",
    "            if match:\n",
    "                class_id = match.group(1)\n",
    "                method_id = match.group(2)\n",
    "                attempt_id = match.group(3)\n",
    "            else:\n",
    "                assert False\n",
    "\n",
    "            with z.open(f'{item}records.json') as f:\n",
    "                records = json.loads(f.read())\n",
    "                attempt_flag = get_attempt_flag(records)\n",
    "            with z.open(f'{item.replace(f\"attempt{attempt_id}/\", \"\")}attemptMapping.json') as f:\n",
    "                mapping = json.loads(f.read())\n",
    "                className = mapping['attempt0']['className']\n",
    "                testClassName = mapping['attempt0']['testClassName']\n",
    "                className = f\"{className}_{class_id}\"\n",
    "                if className not in class_map:\n",
    "                    class_map[className] = {}\n",
    "                if testClassName not in class_map[className]:\n",
    "                    class_map[className][testClassName] = []\n",
    "                class_map[className][testClassName].append(attempt_flag)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:32.629242400Z",
     "start_time": "2024-03-17T03:43:32.531606900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chatunitest total: 378\n",
      "Counter({'PASS': 188, 'COMPILE_ERROR': 105, 'RUNTIME_ERROR': 85})\n",
      "chatunitest valid unique: 167\n"
     ]
    }
   ],
   "source": [
    "from collections import Counter\n",
    "\n",
    "\n",
    "def shorten_key(key):\n",
    "    pattern = r\"(.*?)_(.*?)_\\d+_\\d+_Test\"\n",
    "    match = re.search(pattern, key)\n",
    "    if match:\n",
    "        return match.group(1) + \"#\" + match.group(2)\n",
    "    else:\n",
    "        assert False\n",
    "java_info_map = {k: v for class_name, class_info in class_map.items() for k, v in class_info.items() if k.split(\"_\")[0] != k.split(\"_\")[1]}\n",
    "shorten_java_info_map = Counter([shorten_key(k) for k in java_info_map.keys()])\n",
    "print(f\"chatunitest total: {len(java_info_map)}\")\n",
    "java_valid_map = {k: v for k, v in java_info_map.items() if v[-1] == \"PASS\"}\n",
    "print(\n",
    "    Counter([v[-1] for k, v in java_info_map.items()]),\n",
    ")\n",
    "shorten_java_valid_map = Counter([shorten_key(k) for k in java_valid_map.keys()])\n",
    "print(f\"chatunitest valid unique: {len(shorten_java_valid_map)}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:33.132106100Z",
     "start_time": "2024-03-17T03:43:33.119681600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "expected:  114\n",
      "actual:  114\n"
     ]
    },
    {
     "data": {
      "text/plain": "set()"
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import shutil\n",
    "\n",
    "# java_files[0].replace(TEST_FOLDER, FILTERED_TEST_FOLDER)\n",
    "keys = set()\n",
    "\n",
    "for f in java_files:\n",
    "    key = f.split(\"\\\\\")[-1].split(\".\")[0]\n",
    "    if key in java_valid_map:\n",
    "        keys.add(key)\n",
    "        dst = f.replace(TEST_FOLDER, FILTERED_TEST_FOLDER)\n",
    "        os.makedirs(os.path.dirname(dst), exist_ok=True)\n",
    "        shutil.copy2(f, dst)\n",
    "print(\"expected: \", len(java_valid_map))\n",
    "print(\"actual: \", len(keys))\n",
    "set(java_valid_map.keys()) - keys"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T08:20:18.206772300Z",
     "start_time": "2024-03-16T08:20:18.054938Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dataset total: 378\n",
      "dataset unique: 325\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "from app.classes import Dataset\n",
    "from app.constants import CONFIG\n",
    "\n",
    "# read dataset\n",
    "with open(f\"{CONFIG.dataset_folder}/{DATASET}.pkl\", \"rb\") as f:\n",
    "    raw_dataset = pickle.load(f)\n",
    "    dataset = Dataset(**raw_dataset)\n",
    "\n",
    "dataset_info_list = []\n",
    "for class_info in dataset:\n",
    "    for method_info in class_info:\n",
    "        key = class_info.class_name + \"#\" + method_info.name\n",
    "        dataset_info_list.append(key)\n",
    "print(f\"dataset total: {len(dataset_info_list)}\")\n",
    "dataset_info_map = Counter(dataset_info_list)\n",
    "print(f\"dataset unique: {len(dataset_info_map)}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:36.888882400Z",
     "start_time": "2024-03-17T03:43:36.866048Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "outputs": [],
   "source": [
    "# compare shorten_java_info_map and s\n",
    "for k, v in dataset_info_map.items():\n",
    "    if k not in shorten_java_info_map or shorten_java_info_map[k] != v:\n",
    "        print(k, v, shorten_java_info_map[k])\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:43:37.948032700Z",
     "start_time": "2024-03-17T03:43:37.940621700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "52it [00:00, 4000.80it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "192\n",
      "Total Branch Coverage:  (341, 461) 0.7396963123644251\n",
      "Total Line Coverage: (639, 801) 0.797752808988764\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from app.classes import IterationStatus\n",
    "\n",
    "total_branch_tupe = (0, 0)\n",
    "total_line_tupe = (0, 0)\n",
    "count = 0\n",
    "\n",
    "tree = ET.parse(CLOVER_XML_PATH)\n",
    "# tree = ET.parse(fr\"D:\\IDEA_Projects\\TestJavaCode\\target\\site\\clover\\clover.xml\")\n",
    "root = tree.getroot()\n",
    "root_dict = etree_to_dict(root)['coverage']\n",
    "method_coverage = {} # {method_signature: (branch_rate, line_rate)}\n",
    "for class_info in tqdm(dataset):\n",
    "    for method_info in class_info:\n",
    "        if not f\"{class_info.class_name}#{method_info.name}\" in shorten_java_valid_map:\n",
    "            continue\n",
    "\n",
    "        count += 1\n",
    "        source_packages: list = root_dict['project'][0]['package']\n",
    "        # 找到focal class & focal method\n",
    "        source_package = None\n",
    "        for package in source_packages:\n",
    "            if class_info.package_reference == package['@name']:\n",
    "                source_package = package\n",
    "                break\n",
    "        assert source_package is not None\n",
    "\n",
    "        source_files = source_package['file']\n",
    "        source_file = None\n",
    "        for file in source_files:\n",
    "            if f\"{class_info.class_name}.java\" == file['@name']:\n",
    "                source_file = file\n",
    "                break\n",
    "        assert source_file is not None\n",
    "\n",
    "        source_report = source_file\n",
    "\n",
    "        report_lines = source_report['line']\n",
    "        branches = [line for line in report_lines if line['@type'] == 'cond']\n",
    "        lines = [line for line in report_lines if line['@type'] == 'stmt']\n",
    "\n",
    "        # 当测试某一个方法时，筛选出这个方法的line/branch\n",
    "        branches = [branch for branch in branches if\n",
    "                    method_info.start <= int(branch[\"@num\"]) <= method_info.end]\n",
    "        lines = [line for line in lines if\n",
    "                 method_info.start <= int(line[\"@num\"]) <= method_info.end]\n",
    "\n",
    "        uncovered_branch = []\n",
    "        false_uncovered_branch = []\n",
    "        true_uncovered_branch = []\n",
    "        unreachable_branch = []\n",
    "\n",
    "        for branch in branches:\n",
    "            if branch['@truecount'] == '0' or branch['@falsecount'] == '0':\n",
    "                uncovered_branch.append(branch)\n",
    "            if branch['@truecount'] != '0' and branch['@falsecount'] == '0':\n",
    "                false_uncovered_branch.append(branch)\n",
    "            if branch['@truecount'] == '0' and branch['@falsecount'] != '0':\n",
    "                true_uncovered_branch.append(branch)\n",
    "            if branch['@truecount'] == '0' and branch['@falsecount'] == '0':\n",
    "                unreachable_branch.append(branch)\n",
    "\n",
    "\n",
    "\n",
    "        branch_num_uncovered = len(false_uncovered_branch) + len(true_uncovered_branch) + 2 * len(unreachable_branch)\n",
    "        branch_num_total = 1 if len(branches) == 0 else len(branches) * 2\n",
    "        branch_num_covered = branch_num_total - branch_num_uncovered\n",
    "\n",
    "\n",
    "\n",
    "        uncovered_line = []\n",
    "        for line in lines:\n",
    "            if line['@count'] == '0':\n",
    "                uncovered_line.append(line)\n",
    "\n",
    "        line_num_uncovered = len(uncovered_line)\n",
    "        line_num_total = 1 if len(lines) == 0 else len(lines)\n",
    "        line_num_covered = line_num_total - line_num_uncovered\n",
    "\n",
    "\n",
    "        # if any([method_info.name.startswith(x) for x in [\"get\", \"set\", \"is\"]]) or method_info.name[0].isupper():\n",
    "        #     branch_num_covered = branch_num_total\n",
    "        #     line_num_covered = line_num_total\n",
    "\n",
    "        if line_num_covered == 0:\n",
    "            count -= 1\n",
    "            continue\n",
    "            branch_num_covered = 0\n",
    "\n",
    "        branch_rate_tuple = (branch_num_covered, branch_num_total, branch_num_covered / branch_num_total)\n",
    "        line_rate_tuple = (line_num_covered, line_num_total, line_num_covered / line_num_total)\n",
    "        # print(f\"Class: {class_info.class_name}, Method: {method_info.name}\")\n",
    "        # print(f\"Branch Coverage: {branch_rate_tuple}\")\n",
    "        # print(f\"Line Coverage: {line_rate_tuple}\")\n",
    "        method_coverage[f\"{class_info.class_name}#{method_info.signature}\"] = (branch_rate_tuple, line_rate_tuple)\n",
    "        total_branch_tupe = (total_branch_tupe[0] + branch_rate_tuple[0], total_branch_tupe[1] + branch_rate_tuple[1])\n",
    "        total_line_tupe = (total_line_tupe[0] + line_rate_tuple[0], total_line_tupe[1] + line_rate_tuple[1])\n",
    "print(count)\n",
    "print(f\"Total Branch Coverage:  {total_branch_tupe} {total_branch_tupe[0] / total_branch_tupe[1]}\")\n",
    "print(f\"Total Line Coverage: {total_line_tupe} {total_line_tupe[0] / total_line_tupe[1]}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-17T03:45:17.797560600Z",
     "start_time": "2024-03-17T03:45:17.741668400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "outputs": [],
   "source": [
    "final_total_branch_tuple = (0,0)\n",
    "final_total_line_tuple = (0,0)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T08:26:00.716532800Z",
     "start_time": "2024-03-16T08:26:00.710944Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total Branch Coverage:  (153, 177) 0.864406779661017\n",
      "Total Line Coverage: (272, 295) 0.9220338983050848\n"
     ]
    }
   ],
   "source": [
    "final_total_branch_tuple = (final_total_branch_tuple[0] + total_branch_tupe[0], final_total_branch_tuple[1] + total_branch_tupe[1])\n",
    "final_total_line_tuple = (final_total_line_tuple[0] + total_line_tupe[0], final_total_line_tuple[1] + total_line_tupe[1])\n",
    "print(f\"Total Branch Coverage:  {final_total_branch_tuple} {final_total_branch_tuple[0] / final_total_branch_tuple[1]}\")\n",
    "print(f\"Total Line Coverage: {final_total_line_tuple} {final_total_line_tuple[0] / final_total_line_tuple[1]}\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-03-16T08:26:01.219741300Z",
     "start_time": "2024-03-16T08:26:01.205741900Z"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
